## The basic files and libraries needed for most presentations
# creates the libraries and common-functions sections
read_chunk("../common/utility_functions.R")
require(ggplot2) #for plots
require(lattice) # nicer scatter plots
require(plyr) # for processing data.frames
require(grid) # contains the arrow function
require(biOps) # for basic image processing
require(doMC) # for parallel code
require(png) # for reading png images
require(gridExtra)
require(reshape2) # for the melt function
## To install EBImage
# source("http://bioconductor.org/biocLite.R")
# biocLite("EBImage")
require(EBImage) # for more image processing
used.libraries<-c("ggplot2","lattice","plyr","reshape2","grid","gridExtra","biOps","png","EBImage")
# start parallel environment
registerDoMC()
# functions for converting images back and forth
im.to.df<-function(in.img,out.col="val") {
out.im<-expand.grid(x=1:nrow(in.img),y=1:ncol(in.img))
out.im[,out.col]<-as.vector(in.img)
out.im
}
df.to.im<-function(in.df,val.col="val",inv=F) {
in.vals<-in.df[[val.col]]
if(class(in.vals[1])=="logical") in.vals<-as.integer(in.vals*255)
if(inv) in.vals<-255-in.vals
out.mat<-matrix(in.vals,nrow=length(unique(in.df$x)),byrow=F)
attr(out.mat,"type")<-"grey"
out.mat
}
ddply.cutcols<-function(...,cols=1) {
# run standard ddply command
cur.table<-ddply(...)
cutlabel.fixer<-function(oVal) {
sapply(oVal,function(x) {
cnv<-as.character(x)
mean(as.numeric(strsplit(substr(cnv,2,nchar(cnv)-1),",")[[1]]))
})
}
cutname.fixer<-function(c.str) {
s.str<-strsplit(c.str,"(",fixed=T)[[1]]
t.str<-strsplit(paste(s.str[c(2:length(s.str))],collapse="("),",")[[1]]
paste(t.str[c(1:length(t.str)-1)],collapse=",")
}
for(i in c(1:cols)) {
cur.table[,i]<-cutlabel.fixer(cur.table[,i])
names(cur.table)[i]<-cutname.fixer(names(cur.table)[i])
}
cur.table
}
show.pngs.as.grid<-function(file.list,title.fun,zoom=1) {
preparePng<-function(x) rasterGrob(readPNG(x,native=T,info=T),width=unit(zoom,"npc"),interp=F)
labelPng<-function(x,title="junk") (qplot(1:300, 1:300, geom="blank",xlab=NULL,ylab=NULL,asp=1)+
annotation_custom(preparePng(x))+
labs(title=title)+theme_bw(24)+
theme(axis.text.x = element_blank(),
axis.text.y = element_blank()))
imgList<-llply(file.list,function(x) labelPng(x,title.fun(x)) )
do.call(grid.arrange,imgList)
}
## Standard image processing tools which I use for visualizing the examples in the script
commean.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
weight.sum<-sum(c.cell$weight)
data.frame(xv=mean(c.cell$x),
yv=mean(c.cell$y),
xm=with(c.cell,sum(x*weight)/weight.sum),
ym=with(c.cell,sum(y*weight)/weight.sum)
)
})
}
colMeans.df<-function(x,...) as.data.frame(t(colMeans(x,...)))
pca.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.cov<-cov(c.cell[,c("x","y")])
c.cell.eigen<-eigen(c.cell.cov)
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,
data.frame(vx=c.cell.eigen$vectors[1,],
vy=c.cell.eigen$vectors[2,],
vw=sqrt(c.cell.eigen$values),
th.off=atan2(c.cell.eigen$vectors[2,],c.cell.eigen$vectors[1,]))
)
})
}
vec.to.ellipse<-function(pca.df) {
ddply(pca.df,.(val),function(cur.pca) {
# assume there are two vectors now
create.ellipse.points(x.off=cur.pca[1,"x"],y.off=cur.pca[1,"y"],
b=sqrt(5)*cur.pca[1,"vw"],a=sqrt(5)*cur.pca[2,"vw"],
th.off=pi/2-atan2(cur.pca[1,"vy"],cur.pca[1,"vx"]),
x.cent=cur.pca[1,"x"],y.cent=cur.pca[1,"y"])
})
}
# test function for ellipse generation
# ggplot(ldply(seq(-pi,pi,length.out=100),function(th) create.ellipse.points(a=1,b=2,th.off=th,th.val=th)),aes(x=x,y=y))+geom_path()+facet_wrap(~th.val)+coord_equal()
create.ellipse.points<-function(x.off=0,y.off=0,a=1,b=NULL,th.off=0,th.max=2*pi,pts=36,...) {
if (is.null(b)) b<-a
th<-seq(0,th.max,length.out=pts)
data.frame(x=a*cos(th.off)*cos(th)+b*sin(th.off)*sin(th)+x.off,
y=-1*a*sin(th.off)*cos(th)+b*cos(th.off)*sin(th)+y.off,
id=as.factor(paste(x.off,y.off,a,b,th.off,pts,sep=":")),...)
}
deform.ellipse.draw<-function(c.box) {
create.ellipse.points(x.off=c.box$x[1],
y.off=c.box$y[1],
a=c.box$a[1],
b=c.box$b[1],
th.off=c.box$th[1],
col=c.box$col[1])
}
bbox.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
xmn<-emin(c.cell$x)
xmx<-emax(c.cell$x)
ymn<-emin(c.cell$y)
ymx<-emax(c.cell$y)
out.df<-cbind(c.cell.mean,
data.frame(xi=c(xmn,xmn,xmx,xmx,xmn),
yi=c(ymn,ymx,ymx,ymn,ymn),
xw=xmx-xmn,
yw=ymx-ymn
))
})
}
# since the edge of the pixel is 0.5 away from the middle of the pixel
emin<-function(...) min(...)-0.5
emax<-function(...) max(...)+0.5
extents.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,data.frame(xmin=c(c.cell.mean$x,emin(c.cell$x)),
xmax=c(c.cell.mean$x,emax(c.cell$x)),
ymin=c(emin(c.cell$y),c.cell.mean$y),
ymax=c(emax(c.cell$y),c.cell.mean$y)))
})
}
common.image.path<-"../common"
qbi.file<-function(file.name) file.path(common.image.path,"figures",file.name)
qbi.data<-function(file.name) file.path(common.image.path,"data",file.name)
th_fillmap.fn<-function(max.val) scale_fill_gradientn(colours=rainbow(10),limits=c(0,max.val))
author: Kevin Mader date: 26 March 2015 width: 1440 height: 900 css: ../common/template.css transition: rotate
ETHZ: 227-0966-00L
source('../common/schedule.R')
Segmentation and all the steps leading up to it are really a specialized type of learning problem.
lout<-20
b.grid<-expand.grid(x=seq(-10,10,length.out=lout),y=seq(-10,10,length.out=lout))
ring.image<-mutate(
b.grid,
Intensity = abs(cos(sqrt(x^2+y^2)/15*6.28))
)
ring.image.dist<-ddply(ring.image,.(x,y),function(in.pts) {
in.pts$Distance<-with(in.pts,sqrt(
min(
x^2+y^2
)
))
in.pts
})
ring.image.seg<-mutate(ring.image.dist,
Segmented = ifelse(
Intensity>0.5 & Distance>5 & Distance<10,
"Foreground","Background")
)
ring.image.seg<-mutate(ring.image.seg,
x=round(x+11),
y=round(y+11))
ring.im<-df.to.im(mutate(
b.grid,
val = 127*(abs(cos(sqrt(x^2+y^2)/15*6.28))+runif(nrow(b.grid)))
))
ring.img<-im.to.df(ring.im,out.col="Intensity")
blur.img<-im.to.df(imgBlur(ring.im),out.col="Intensity")
med.img<-im.to.df(imgBlockMedianFilter(ring.im),out.col="Intensity")
simple.image<-ggplot(ring.img,aes(x,y,fill=Intensity))+ geom_tile(color="black")+ coord_equal()+ theme_bw(15) simple.image
What does identify mean?
How do we quantify this?
Try a number of different threshold values on the image and compare them to the original classification
mk.th.img<-function(in.img,steps,smin=0.3,smax=0.7) ldply(seq(smin,smax,length.out=steps),function(c.thresh) {
mutate(in.img,
Segmented = ifelse(
Intensity>c.thresh*255,
"Foreground","Background"),
Thresh = c.thresh
)
})
ggplot(mk.th.img(ring.img,9),aes(x,y,fill=Segmented))+ geom_tile(color="black")+ coord_equal()+ facet_wrap(~Thresh)+ theme_bw(15)
mk.stat.table<-function(in.img,steps,smin=0.3,smax=0.7)
ddply(mk.th.img(in.img,steps,smin,smax),.(Thresh),function(c.img) {
in.df<-merge(c.img[,c("x","y","Segmented")],
ring.image.seg[,c("x","y","Segmented")],
by=c("x","y"),suffixes=c("C","GT"))
in.df<-mutate(in.df,Match=(SegmentedC==SegmentedGT))
data.frame(TP=nrow(subset(in.df,SegmentedGT=="Foreground" & Match)),
TN=nrow(subset(in.df,SegmentedGT=="Background" & Match)),
FP=nrow(subset(in.df,SegmentedGT=="Foreground" & !Match)),
FN=nrow(subset(in.df,SegmentedGT=="Background" & !Match))
)
})
tp.table<-mk.stat.table(ring.img,6,smin=0,smax=1)
kable(tp.table)
| Thresh | TP | TN | FP | FN |
|---|---|---|---|---|
| 0.0 | 200 | 0 | 0 | 161 |
| 0.2 | 195 | 17 | 5 | 144 |
| 0.4 | 172 | 68 | 28 | 93 |
| 0.6 | 110 | 119 | 90 | 42 |
| 0.8 | 44 | 155 | 156 | 6 |
| 1.0 | 0 | 161 | 200 | 0 |
ggplot(melt(tp.table,id.vars=c("Thresh")),
aes(x=Thresh*100,y=100*value/200,color=variable))+
geom_point()+
geom_path()+
labs(color="",y="Percent (%)",x="Threshold Value (%)")+
theme_bw(20)
prec.table<-mutate(mk.stat.table(ring.img,6),
Precision=TP/(TP+FN),
Recall=TP/(TP+FP)
)
kable(prec.table)
| Thresh | TP | TN | FP | FN | Precision | Recall |
|---|---|---|---|---|---|---|
| 0.30 | 185 | 42 | 15 | 119 | 0.6085526 | 0.925 |
| 0.38 | 175 | 58 | 25 | 103 | 0.6294964 | 0.875 |
| 0.46 | 157 | 87 | 43 | 74 | 0.6796537 | 0.785 |
| 0.54 | 130 | 104 | 70 | 57 | 0.6951872 | 0.650 |
| 0.62 | 102 | 125 | 98 | 36 | 0.7391304 | 0.510 |
| 0.70 | 79 | 143 | 121 | 18 | 0.8144330 | 0.395 |
Reciever Operating Characteristic (first developed for WW2 soldiers detecting objects in battlefields using radar). The ideal is the top-right (identify everything and miss nothing)
prec.table<-mutate(mk.stat.table(ring.img,100),
Precision=TP/(TP+FN),
Recall=TP/(TP+FP)
)
ggplot(prec.table,aes(x=Recall,y=Precision))+
geom_point(aes(color=Thresh,shape="Image"),size=5)+
geom_point(data=data.frame(Recall=1,Precision=1),
aes(shape="Ideal"),size=5,color="red")+
geom_path()+
#coord_equal()+
scale_color_gradientn(colours=rainbow(6))+
labs(shape="")+
theme_bw(20)
We can then use this ROC curve to compare different filters (or even entire workflows), if the area is higher the approach is better.
all.img<-rbind( cbind(mk.th.img(ring.img,3), name="Normal"), cbind(mk.th.img(blur.img,3), name="Gaussian"), cbind(mk.th.img(med.img,3), name="Median") ) simple.image<-ggplot(all.img,aes(x,y,fill=Segmented))+ geom_tile(color="black")+ coord_equal()+ facet_grid(name~Thresh)+ theme_bw(15) simple.image
Different approaches can be compared by area under the curve
stab<-function(img) mk.stat.table(img,100,smin=0.05,smax=0.95)
all.table<-rbind(
cbind(stab(ring.img), name="Normal"),
cbind(stab(blur.img), name="Gaussian"),
cbind(stab(med.img), name="Median")
)
prec.table<-mutate(all.table,
Precision=TP/(TP+FN),
Recall=TP/(TP+FP)
)
ggplot(prec.table,aes(x=Recall,y=Precision,color=name))+
geom_point(aes(shape="Image"),size=5)+
geom_point(data=data.frame(Recall=1,Precision=1),
aes(shape="Ideal"),size=5,color="red")+
geom_path()+
#coord_equal()+
labs(shape="",color="Filter")+
theme_bw(20)
Another way of showing the ROC curve (more common for machine learning rather than medical diagnosis) is using the True positive rate and False positive rate
These show very similar information with the major difference being the goal is to be in the upper left-hand corner. Additionally random guesses can be shown as the slope 1 line. Therefore for a system to be useful it must lie above the random line.
tp.table<-mutate(all.table,
TPRate=TP/(TP+FN),
FPRate=FP/(FP+TN)
)
ggplot(tp.table,aes(x=FPRate,y=TPRate,color=name))+
geom_point(aes(shape="Image"),size=2)+
geom_point(data=data.frame(TPRate=1,FPRate=0),
aes(shape="Ideal"),size=5,color="red")+
geom_path()+
geom_segment(x=0,xend=1,y=0,yend=1,aes(color="Random Guess"))+
xlim(0,1)+ylim(0,1)+coord_equal()+
labs(shape="",x="False Positive Rate",y="True Positive Rate",linetype="")+
theme_bw(20)
We have dramatically simplified our data, but there is still too much.
56GB / sample
\[\downarrow\]
(1.75GB / sample)
Once we have a clearly segmented image, it is often helpful to identify the sub-components of this image. The easist method for identifying these subcomponents is called component labeling which again uses the neighborhood \(\mathcal{N}\) as a criterion for connectivity, resulting in pixels which are touching being part of the same object.
In general, the approach works well since usually when different regions are touching, they are related. It runs into issues when you have multiple regions which agglomerate together, for example a continuous pore network (1 object) or a cluster of touching cells
The more general formulation of the problem is for networks (roads, computers, social). Are the points start and finish connected?
nx<-5
ny<-10
cross.im<-expand.grid(x=c(-nx:nx),y=c(-ny:ny))
max.nodes.fn<-function(x,y) 1+round(abs(x+y/2)) # non-homogenous network connectivity
cross.im<-ddply(cross.im,.(x,y), function(in.pt) {
out.df<-data.frame(xv=c(),yv=c())
max.nodes<-max.nodes.fn(in.pt[1,"x"],in.pt[1,"y"])
for(i in 1:round(runif(1,max=max.nodes))) {
c.angle<-runif(1,max=pi)
out.df<-rbind(out.df,data.frame(
xv=round(cos(c.angle)),
yv=round(sin(c.angle))
))
}
out.df
})
id.fn<-function(x,y) (y*100+x)
cross.im$id<-with(cross.im,id.fn(x,y))
cross.lab.im<-cbind(cross.im,label="Normal")
cross.lab.im$label<-as.character(cross.lab.im$label)
cross.lab.im[which(cross.lab.im$id==cross.lab.im[1,]$id),"label"]<-"Begin"
cross.lab.im[which(cross.lab.im$id==cross.lab.im[nrow(cross.lab.im),]$id),"label"]<-"End"
cross.lab.im$label<-as.factor(cross.lab.im$label)
ggplot(cross.lab.im,aes(x=x,y=y,xend=x+xv,yend=y+yv))+
geom_point(aes(color=label),size=5)+geom_segment()+
theme_bw(20)+labs(color="Goal")
We start out with the network and we grow Begin to its connections. In a brushfire-style algorithm
# Network traversing algorithm
iter.grow<-function(ncross.lab.im,iter.cnt=10,source.phase="Begin",target.phase=NA,grow.into.phase="Normal") {
if(is.na(target.phase)) target.phase<-source.phase
for(iters in 1:iter.cnt) {
begin.pts<-subset(ncross.lab.im,label==source.phase | label==target.phase)
begin.ids<-unique(begin.pts$id)
begin.pts.term<-subset(ncross.lab.im,id.fn(x+xv,y+yv) %in% begin.ids)
begin.pts<-rbind(begin.pts,
begin.pts.term
)
ncross.lab.im$label<-as.character(ncross.lab.im$label)
for(i in 1:nrow(begin.pts)) {
cur.pt<-begin.pts[i,]
child.id<-id.fn(cur.pt$x+cur.pt$xv,cur.pt$y+cur.pt$yv)
switch.pixs<-which(ncross.lab.im$id==child.id &
ncross.lab.im$label == grow.into.phase)
if(length(switch.pixs)>1) ncross.lab.im[switch.pixs ,"label"]<-target.phase
}
switch.pixs<-which(ncross.lab.im$id %in% begin.pts.term$id &
ncross.lab.im$label == grow.into.phase)
if(length(switch.pixs)>1) ncross.lab.im[switch.pixs,"label"]<-target.phase
}
ncross.lab.im$label<-as.factor(ncross.lab.im$label)
ncross.lab.im
}
ggplot(iter.grow(cross.lab.im,1,target.phase="Pixels Grown"),aes(x=x,y=y,xend=x+xv,yend=y+yv))+ geom_point(aes(color=label),size=5)+geom_segment()+ theme_bw(20)+labs(color="Type",title="1 Iteration from Begin")
ggplot(iter.grow(cross.lab.im,10,target.phase="Pixels Grown"),aes(x=x,y=y,xend=x+xv,yend=y+yv))+ geom_point(aes(color=label),size=3)+geom_segment()+coord_equal()+ theme_bw(20)+labs(color="Type",title="10 iterations from Begin")
ggplot(iter.grow(cross.lab.im,10,source.phase="End",target.phase="Pixels Grown"),aes(x=x,y=y,xend=x+xv,yend=y+yv))+ geom_point(aes(color=label),size=3)+geom_segment()+coord_equal()+ theme_bw(20)+labs(color="Type",title="10 iterations from End")
ggplot(iter.grow(cross.lab.im,round(2*sqrt(20^2+10^2)),target.phase="Pixels Grown"),aes(x=x,y=y,xend=x+xv,yend=y+yv))+ geom_point(aes(color=label),size=5)+geom_segment()+coord_equal()+ theme_bw(20)+labs(color="Type",title="Diagonal iterations from Begin")
Same as for networks but the neighborhood is defined with a kernel (circular, full, line, etc) and labels must be generate for the image
Assign a unique label to each point in the image \[ L(x,y) = y*\text{Im}_{width}+x \]
For each point \((x,y)\)
Repeat until no more \(L\) values are changed
nx<-10
ny<-5
ang.vals<-seq(0,pi/2,length.out=2)
cross.im<-expand.grid(x=c(-nx:nx),y=c(-ny:ny))
cross.im<-ddply(cross.im,.(x,y), function(in.pt) {
out.df<-data.frame(xv=c(),yv=c())
for(i in 1:length(ang.vals)) {
c.angle<-ang.vals[i]
out.df<-rbind(out.df,data.frame(
xv=round(cos(c.angle)),
yv=round(sin(c.angle))
))
}
out.df
})
id.fn<-function(x,y) (y*100+x)
cross.im$id<-with(cross.im,id.fn(x,y))
cross.im<-subset(cross.im,x!=0 & y!=0)
ggplot(cross.im,aes(x=x,y=y,xend=x+xv,yend=y+yv))+
geom_point(aes(color=as.factor(id)),size=5)+geom_segment()+
theme_bw(20)+labs(color="Goal")
# Component labeling algorithm
im.iter.grow<-function(icross.im,iter.cnt=10) {
ncross.im<-icross.im
for(iters in 1:iter.cnt) {
begin.pts<-ncross.im
for(i in 1:nrow(begin.pts)) {
cur.pt<-begin.pts[i,]
child.id<-id.fn(cur.pt$x+cur.pt$xv,cur.pt$y+cur.pt$yv)
# fill into lower points
switch.pixs<-which(icross.im$id==child.id &
icross.im$id > cur.pt$id)
if(length(switch.pixs)>1) ncross.im[switch.pixs ,"id"]<-cur.pt$id
}
icross.im<-ncross.im
}
ncross.im
}
ggplot(im.iter.grow(cross.im,2),aes(x=x,y=y,xend=x+xv,yend=y+yv))+ geom_point(aes(color=as.factor(id)),size=3)+geom_segment()+coord_equal()+ theme_bw(20)+labs(color="Type",title="2 Iterations")
ggplot(im.iter.grow(cross.im,3),aes(x=x,y=y,xend=x+xv,yend=y+yv))+ geom_point(aes(color=as.factor(id)),size=3)+geom_segment()+ coord_equal()+ theme_bw(20)+labs(color="Type",title="3 Iterations")
The image very quickly converges and after 4 iterations the task is complete. For larger more complicated images with thousands of components this task can take longer, but there exist much more efficient algorithms for labeling components which alleviate this issue.
ggplot(im.iter.grow(cross.im,4),aes(x=x,y=y,xend=x+xv,yend=y+yv))+ geom_point(aes(color=as.factor(id)),size=5)+geom_segment()+coord_equal()+ theme_bw(20)+labs(color="Type",title="4 iterations")
In reality the component labeling algorithms are usually implemented, but it is important to understand how they work and their relationship with neighborhood in order to interpret the results correctly.
labImg=bwlabel(imName)
cell.im<-jpeg::readJPEG("ext-figures/Cell_Colony.jpg")
cell.lab.df<-im.to.df(bwlabel(cell.im<.6))
ggplot(subset(cell.lab.df,val>0),aes(x=x,y=y,fill=val))+
geom_raster()+
labs(fill="Label",title="Labeled Image")+
theme_bw(20)+coord_equal()
size.histogram<-ddply(subset(cell.lab.df,val>0),.(val),function(c.label) data.frame(count=nrow(c.label))) keep.vals<-subset(size.histogram,count>25) ggplot(size.histogram,aes(x=count))+ geom_density(aes(color="Entire Labeled Images"))+ geom_density(data=keep.vals,aes(color="Larger Cells"))+ labs(x="Cell Volume",y="Frequency of Cells",title="Size Distribution",color="Distribution")+ scale_y_sqrt()+ theme_bw(20)
ggplot(subset(cell.lab.df,val %in% keep.vals$val),aes(x=x,y=y,fill=as.factor(val)))+ geom_raster()+ labs(fill="Intensity",title="Larger Cells (>25px)")+ theme_bw(20)+coord_equal()
Now all the voxels which are connected have the same label. We can then perform simple metrics like
Next week we will cover how more detailed analysis can be performed on these data.
cell.im<-jpeg::readJPEG("ext-figures/Cell_Colony.jpg")
cell.lab.df<-im.to.df(bwlabel(cell.im<.6))
size.histogram<-ddply(subset(cell.lab.df,val>0),.(val),function(c.label) data.frame(count=nrow(c.label)))
keep.vals<-subset(size.histogram,count>25)
cur.cell.id<-subset(cell.lab.df,val %in% keep.vals$val)$val[1]
cur.cell.df<-subset(cell.lab.df,val==cur.cell.id)
ggplot(subset(cell.lab.df,val %in% keep.vals$val),aes(x=x,y=y,fill=as.numeric(as.factor(val))))+
geom_raster()+
geom_tile(data=cur.cell.df,fill="red",alpha=0.5)+
labs(fill="Label",title="Larger Cells (>25px)")+
theme_bw(20)+coord_equal()
\[ I_{id}(x,y) = \begin{cases} 1, & L(x,y) = id \\ 0, & \text{otherwise} \end{cases}\]
mean.df<-colMeans.df(cur.cell.df[,c("x","y")])
ggplot(cur.cell.df,aes(x=x,y=y))+
geom_tile(color="black",fill="grey")+
geom_point(data=mean.df,color="red",pch=3,size=20)+
labs(title="Single Cell")+
theme_bw(20)+coord_equal()+guides(fill=F)
\[ \bar{x} = \frac{1}{N} \sum_{\vec{v}\in I_{id}} \vec{v}\cdot\vec{i} \] \[ \bar{y} = \frac{1}{N} \sum_{\vec{v}\in I_{id}} \vec{v}\cdot\vec{j} \] \[ \bar{z} = \frac{1}{N} \sum_{\vec{v}\in I_{id}} \vec{v}\cdot\vec{k} \]
If the gray values are kept (or other meaningful ones are used), this can be seen as a weighted center of volume or center of mass (using \(I_{gy}\) to distinguish it from the labels)
commean.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
weight.sum<-sum(c.cell$weight)
data.frame(xv=mean(c.cell$x),
yv=mean(c.cell$y),
xm=with(c.cell,sum(x*weight)/weight.sum),
ym=with(c.cell,sum(y*weight)/weight.sum)
)
})
}
cur.cell.df.weight<-cbind(cur.cell.df,weight=with(cur.cell.df,(x-17)^2+(y-9)^2))
commean.df<-commean.fun(cur.cell.df.weight)
ggplot(cur.cell.df.weight,aes(x=x,y=y))+
geom_tile(aes(fill=weight),color="black")+
geom_point(data=commean.df,aes(x=xv,y=yv,color="COV"),pch=16,size=20)+
geom_point(data=commean.df,aes(x=xm,y=ym,color="COM"),pch=16,size=20)+
labs(title="Single Cell",color="Center",fill="Igy")+
theme_bw(20)+coord_equal()#+guides(fill=T)
\[ \Sigma I_{gy} = \frac{1}{N} \sum_{\vec{v}\in I_{id}} I_{gy}(\vec{v}) \] \[ \bar{x} = \frac{1}{\Sigma I_{gy}} \sum_{\vec{v}\in I_{id}} (\vec{v}\cdot\vec{i}) I_{gy}(\vec{v}) \] \[ \bar{y} = \frac{1}{\Sigma I_{gy}} \sum_{\vec{v}\in I_{id}} (\vec{v}\cdot\vec{j}) I_{gy}(\vec{v}) \] \[ \bar{z} = \frac{1}{\Sigma I_{gy}} \sum_{\vec{v}\in I_{id}} (\vec{v}\cdot\vec{k}) I_{gy}(\vec{v}) \]
Exents or caliper lenghts are the size of the object in a given direction. Since the coordinates of our image our \(x\) and \(y\) the extents are calculated in these directions
# since the edge of the pixel is 0.5 away from the middle of the pixel
emin<-function(...) min(...)-0.5
emax<-function(...) max(...)+0.5
extents.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,data.frame(xmin=c(c.cell.mean$x,emin(c.cell$x)),
xmax=c(c.cell.mean$x,emax(c.cell$x)),
ymin=c(emin(c.cell$y),c.cell.mean$y),
ymax=c(emax(c.cell$y),c.cell.mean$y)))
})
}
cell.extents<-extents.fun(cur.cell.df)
ggplot(cur.cell.df,aes(x=x,y=y))+
geom_tile(color="black",fill="grey")+
geom_segment(data=cell.extents,aes(x=xmin,y=ymin, xend=xmax,yend=ymax),color="red",size=2,lineend="square")+
labs(title="Single Cell")+
theme_bw(20)+coord_equal()+guides(fill=F)
Define extents as the minimum and maximum values along the projection of the shape in each direction \[ \text{Ext}_x = \left\{ \forall \vec{v}\in I_{id}: max(\vec{v}\cdot\vec{i})-min(\vec{v}\cdot\vec{i}) \right\} \] \[ \text{Ext}_y = \left\{ \forall \vec{v}\in I_{id}: max(\vec{v}\cdot\vec{j})-min(\vec{v}\cdot\vec{j}) \right\} \] \[ \text{Ext}_z = \left\{ \forall \vec{}\in I_{id}: max(\vec{v}\cdot\vec{k})-min(\vec{v}\cdot\vec{k}) \right\} \]
By definition (New Oxford American): varying in magnitude according to the direction of measurement.
Due to its very vague definition, it can be mathematically characterized in many different very much unequal ways (in all cases 0 represents a sphere)
\[ Aiso1 = \frac{\text{Longest Side}}{\text{Shortest Side}} - 1 \]
\[ Aiso2 = \frac{\text{Longest Side}-\text{Shortest Side}}{\text{Longest Side}} \]
\[ Aiso3 = \frac{\text{Longest Side}}{\text{Average Side Length}} - 1 \]
\[ Aiso4 = \frac{\text{Longest Side}-\text{Shortest Side}}{\text{Average Side Length}} \]
\[ \cdots \rightarrow \text{ ad nauseum} \]
Let's take some sample objects
# test function for ellipse generation
# ggplot(ldply(seq(-pi,pi,length.out=100),function(th) create.ellipse.points(a=1,b=2,th.off=th,th.val=th)),aes(x=x,y=y))+geom_path()+facet_wrap(~th.val)+coord_equal()
create.ellipse.points<-function(x.off=0,y.off=0,a=1,b=NULL,th.off=0,th.max=2*pi,pts=36,...) {
if (is.null(b)) b<-a
th<-seq(0,th.max,length.out=pts)
data.frame(x=a*cos(th.off)*cos(th)+b*sin(th.off)*sin(th)+x.off,
y=-1*a*sin(th.off)*cos(th)+b*cos(th.off)*sin(th)+y.off,
id=as.factor(paste(x.off,y.off,a,b,th.off,pts,sep=":")),...)
}
deform.ellipse.draw<-function(c.box) {
create.ellipse.points(x.off=c.box$x[1],
y.off=c.box$y[1],
a=c.box$a[1],
b=c.box$b[1],
th.off=c.box$th[1],
col=c.box$col[1])
}
test.objs<-data.frame(x=0,y=0,th.off=0,a=5,b=rev(c(1e-3,1e-2,1e-1,1,2,3,4,5)))
test.objs$col<-1:nrow(test.objs)
test.objs$x.extents<-2*test.objs$a
test.objs$y.extents<-2*test.objs$b
ggplot(ddply(test.objs,.(col),deform.ellipse.draw),
aes(x=x,y=y,group=as.factor(id)))+
geom_polygon(color="black",fill="blue")+coord_equal()+
facet_grid(col~.)+theme_bw(15)
s.aiso<-function(lside,sside) (lside/sside-1)
aiso<-function(lside,sside) ((lside-sside)/lside)
a.aiso<-function(lside,sside) (2*lside/(lside+sside)-1)
as.aiso<-function(lside,sside) (2*(lside-sside)/(lside+sside))
awrap<-function(c.fun) function(in.ell.dim) c.fun(pmax(2*in.ell.dim$a,2*in.ell.dim$b),pmin(2*in.ell.dim$a,2*in.ell.dim$b))
aiso.table.fn<-function(in.objs) {
ddply(in.objs,.(b),function(c.ell) {
data.frame(Aiso1=awrap(s.aiso)(c.ell),
Aiso2=awrap(aiso)(c.ell),
Aiso3=awrap(a.aiso)(c.ell),
Aiso4=awrap(as.aiso)(c.ell))
})
}
aiso.table<-aiso.table.fn(test.objs)
names(aiso.table)[1]<-"Y Extent"
kable(aiso.table,digits=2)
| Y Extent | Aiso1 | Aiso2 | Aiso3 | Aiso4 |
|---|---|---|---|---|
| 0.00 | 4999.00 | 1.00 | 1.00 | 2.00 |
| 0.01 | 499.00 | 1.00 | 1.00 | 1.99 |
| 0.10 | 49.00 | 0.98 | 0.96 | 1.92 |
| 1.00 | 4.00 | 0.80 | 0.67 | 1.33 |
| 2.00 | 1.50 | 0.60 | 0.43 | 0.86 |
| 3.00 | 0.67 | 0.40 | 0.25 | 0.50 |
| 4.00 | 0.25 | 0.20 | 0.11 | 0.22 |
| 5.00 | 0.00 | 0.00 | 0.00 | 0.00 |
ggplot(ddply(test.objs,.(col),deform.ellipse.draw),
aes(x=x,y=y,group=as.factor(id)))+
geom_polygon(color="black",fill="blue")+coord_equal()+
facet_wrap(~col)+theme_bw(15)
names(aiso.table)[1]<-"y.extent" ggplot(aiso.table,aes(x=y.extent))+ geom_line(aes(y=Aiso1,color="Aiso1"))+ geom_line(aes(y=Aiso2,color="Aiso2"))+ geom_line(aes(y=Aiso3,color="Aiso3"))+ geom_line(aes(y=Aiso4,color="Aiso4"))+ scale_y_log10()+ labs(x="Y extents",y="Anisotropy",color="Metric")+ theme_bw(20)
Objects with uniformally distributed, independent \(x\) and \(y\) extents
m.count<-5000
many.objs<-data.frame(x=0,y=0,th.off=0,a=runif(m.count,0,20),b=runif(m.count,0,20))
many.objs$col<-1:nrow(many.objs)
many.objs$x.extents<-2*many.objs$a
many.objs$y.extents<-2*many.objs$b
maiso.table<-aiso.table.fn(many.objs)
ggplot(ddply(subset(many.objs,col<16),.(col),deform.ellipse.draw),
aes(x=x,y=y,group=as.factor(id)))+
geom_polygon(color="black",fill="blue")+coord_equal()+
facet_wrap(~col,ncol=5)+theme_bw(15)
ggplot(many.objs)+ geom_density(aes(x=x.extents,color="X"),size=2)+ geom_density(aes(x=y.extents,color="Y"),size=2)+ #scale_x_log10()+ labs(x="Extent Length (pixels)",color="Axis")+ guides(fill=F)+ xlim(0,80)+ theme_bw(20)
splom(subset(maiso.table,Aiso1<10)[,c("Aiso1","Aiso2","Aiso3","Aiso4")],pch=16)
ggplot(maiso.table)+ geom_density(aes(x=Aiso1,color="Aiso1"),size=2)+ geom_density(aes(x=Aiso2,color="Aiso2"),fill="green",alpha=0.5)+ geom_density(aes(x=Aiso3,color="Aiso3"),size=2)+ geom_density(aes(x=Aiso4,color="Aiso4"),size=2)+ #scale_x_log10()+ labs(x="Anisotropy",color="Metric")+ xlim(0,2)+guides(fill=F)+ theme_bw(20)
ggplot(subset(cell.lab.df,val %in% keep.vals$val),aes(x=x,y=y,
fill=as.numeric(as.factor(val))))+
geom_raster()+
geom_tile(data=cur.cell.df,fill="red",alpha=0.5)+
labs(fill="Label",title="Labeled Cells")+
theme_bw(20)#+coord_equal()
cell.extents.all<-extents.fun(subset(cell.lab.df,val %in% keep.vals$val)) ggplot(cell.extents.all,aes(x=x,y=y))+ geom_segment(aes(x=xmin,y=ymin, xend=xmax,yend=ymax),color="red",lineend="square")+ geom_point(aes(color=as.numeric(as.factor(val))),size=2)+ labs(title="COM and Extents",color="Label")+ theme_bw(20)#+coord_equal()
mean.pos.subfun<-function(in.df) {
ddply(in.df,.(val),function(c.cell) {
c.mean<-colMeans.df(c.cell)
data.frame(x=c.cell$x-c.mean$x,y=c.cell$y-c.mean$y)
})
}
mean.sub.df<-mean.pos.subfun(subset(cell.lab.df,val %in% keep.vals$val))
ggplot(cell.extents.all,aes(x=x-x,y=y-y))+
geom_raster(data=mean.sub.df,aes(x=x,y=y))+
geom_segment(aes(x=xmin-x,y=ymin-y, xend=xmax-x,yend=ymax-y),
color="red")+
geom_point(color="red",size=2)+
facet_wrap(~val,ncol=14)+
labs(x="x - COM",y="y - COM")+
theme_bw(15)+coord_equal()
bbox.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
xmn<-emin(c.cell$x)
xmx<-emax(c.cell$x)
ymn<-emin(c.cell$y)
ymx<-emax(c.cell$y)
out.df<-cbind(c.cell.mean,
data.frame(xi=c(xmn,xmn,xmx,xmx,xmn),
yi=c(ymn,ymx,ymx,ymn,ymn),
xw=xmx-xmn,
yw=ymx-ymn
))
})
}
cell.bbox.all<-bbox.fun(subset(cell.lab.df,val %in% keep.vals$val))
ggplot(cell.bbox.all,aes(x=x-x,y=y-y))+
geom_raster(data=mean.sub.df,aes(x=x,y=y))+
geom_path(aes(x=xi-x,y=yi-y),
color="red")+
geom_point(color="red",size=2)+
facet_wrap(~val,ncol=14)+
labs(x="x - COM",y="y - COM")+
theme_bw(15)+coord_equal()
cell.bbox.all$aiso<-with(cell.bbox.all,(pmax(xw,yw)-pmin(xw,yw))/pmax(xw,yw)+val/1e5)
ggplot(cell.bbox.all,aes(x=x-x,y=y-y))+
#geom_raster(data=mean.sub.df,aes(x=x,y=y))+
geom_path(aes(x=xi-x,y=yi-y,group=val),
color="red")+
geom_point(color="red",size=2)+
facet_wrap(~aiso,ncol=14)+
labs(x="x - COM",y="y - COM")+
theme_bw(15)+coord_equal()
While easy to calculate, the bounding box / extents approach is a very rough approximation for most of the objects in our image. In particular objects which are not parallel to the \(XY\)-axes are misrepresented.
While many of the topics covered in Linear Algebra and Statistics courses might not seem very applicable to real problems at first glance, at least a few of them come in handy for dealing distributions of pixels (they will only be briefly covered, for more detailed review look at some of the suggested material)
Similar to K-Means insofar as we start with a series of points in a vector space and want to condense the information. With PCA instead of searching for distinct groups, we try to find a linear combination of components which best explain the variance in the system.
As an example we will use a very simple example of corn and chicken prices vs time
n.pts<-20
test.pca<-data.frame(time=c(1:n.pts),
corn.price=0.01*runif(n.pts)+0.05)
test.pca$chicken.price<-with(test.pca,corn.price+0.01*runif(n.pts))
ggplot(test.pca,aes(x=time))+
geom_line(aes(y=corn.price,color="Corn Price ($/kg)"))+
geom_line(aes(y=chicken.price,color="Chicken Price ($/g)"))+
labs(x="Time (days)",y="Price",color="Commodity")+
theme_bw(20)
test.pca.princomp<-princomp(test.pca[,-1]) test.pca$pca1<-test.pca.princomp$scores[,1] test.pca$pca2<-test.pca.princomp$scores[,2] ggplot(test.pca,aes(x=time))+ geom_line(aes(y=pca1,color="Principal Component #1"))+ geom_line(aes(y=pca2,color="Principal Component #2"))+ geom_line(aes(y=corn.price,color="Corn Price ($/kg)"))+ geom_line(aes(y=chicken.price,color="Chicken Price ($/g)"))+ labs(x="Time (days)",y="Price",color="Commodity")+ theme_bw(20)
The first principal component condenses the correlated information in both the chicken and corn prices (perhaps the underlying cost of fuel) since it explains the most variance in the final table of corn and chicken prices.
ggplot(test.pca,aes(x=pca1))+ geom_point(aes(y=corn.price,color="Corn Price ($/kg)"))+ geom_point(aes(y=chicken.price,color="Chicken Price ($/g)"))+ labs(x="Principal Component #1",y="Price",color="Commodity")+ theme_bw(20)
The second principal component is then related to the unique information seperating chicken from corn prices but neither indices directly themselves (maybe the cost of antibiotics)
ggplot(test.pca,aes(x=pca2))+ geom_point(aes(y=corn.price,color="Corn Price ($/kg)"))+ geom_point(aes(y=chicken.price,color="Chicken Price ($/g)"))+ geom_point(aes(y=5*(chicken.price-corn.price),color="Difference in Chicken-Corn"))+ labs(x="Principal Component #2",y="Price",color="Commodity")+ theme_bw(20)
Going back to a single cell, we have the a distribution of \(x\) and \(y\) values.
A principal component analysis of the voxel positions, will calculate two new principal components (the components themselves are the relationships between the input variables and the scores are the final values.)
pca.fun<-function(in.df) {
ddply(in.df,.(val), function(c.cell) {
c.cell.cov<-cov(c.cell[,c("x","y")])
c.cell.eigen<-eigen(c.cell.cov)
c.cell.mean<-colMeans.df(c.cell[,c("x","y")])
out.df<-cbind(c.cell.mean,
data.frame(vx=c.cell.eigen$vectors[1,],
vy=c.cell.eigen$vectors[2,],
vw=sqrt(c.cell.eigen$values),
th.off=atan2(c.cell.eigen$vectors[2,],c.cell.eigen$vectors[1,]))
)
})
}
cur.cell.id<-subset(cell.lab.df,val %in% keep.vals$val)$val[100]
cur.cell.id<-58
cur.cell.df<-subset(cell.lab.df,val==cur.cell.id)
cell.pca<-pca.fun(cur.cell.df)
ggplot(cur.cell.df,aes(x=x,y=y))+
geom_tile(color="black",fill="grey")+
geom_segment(data=cell.pca,aes(x=x,y=y,xend=x+vx*vw,yend=y+vy*vw,
color=as.factor(round(vw*10)/10)),
arrow=arrow(length = unit(0.3,"cm")),size=2)+
labs(title="Single Cell",color="Score /\nEigenvalue")+
theme_bw(20)+coord_equal()+guides(fill=F)
We start off by calculating the covariance matrix from the list of \(x\), \(y\), and \(z\) points that make up our object of interest.
\[ COV(I_{id}) = \frac{1}{N} \sum_{\forall\vec{v}\in I_{id}} \begin{bmatrix} \vec{v}_x\vec{v}_x & \vec{v}_x\vec{v}_y & \vec{v}_x\vec{v}_z\\ \vec{v}_y\vec{v}_x & \vec{v}_y\vec{v}_y & \vec{v}_y\vec{v}_z\\ \vec{v}_z\vec{v}_x & \vec{v}_z\vec{v}_y & \vec{v}_z\vec{v}_z \end{bmatrix} \]
We then take the eigentransform of this array to obtain the eigenvectors (principal components, \(\vec{\Lambda}_{1\cdots 3}\)) and eigenvalues (scores, \(\lambda_{1\cdots 3}\))
\[ COV(I_{id}) \longrightarrow \underbrace{\begin{bmatrix} \vec{\Lambda}_{1x} & \vec{\Lambda}_{1y} & \vec{\Lambda}_{1z} \\ \vec{\Lambda}_{2x} & \vec{\Lambda}_{2y} & \vec{\Lambda}_{2z} \\ \vec{\Lambda}_{3x} & \vec{\Lambda}_{3y} & \vec{\Lambda}_{3z} \end{bmatrix}}_{\textrm{Eigenvectors}} * \underbrace{\begin{bmatrix} \lambda_1 & 0 & 0 \\ 0 & \lambda_2 & 0 \\ 0 & 0 & \lambda_3 \end{bmatrix}}_{\textrm{Eigenvalues}} * \underbrace{\begin{bmatrix} \vec{\Lambda}_{1x} & \vec{\Lambda}_{1y} & \vec{\Lambda}_{1z} \\ \vec{\Lambda}_{2x} & \vec{\Lambda}_{2y} & \vec{\Lambda}_{2z} \\ \vec{\Lambda}_{3x} & \vec{\Lambda}_{3y} & \vec{\Lambda}_{3z} \end{bmatrix}^{T}}_{\textrm{Eigenvectors}} \] The principal components tell us about the orientation of the object and the scores tell us about the corresponding magnitude (or length) in that direction.
princomp or pca in various languages and scale well to very large datasets.While the eigenvalues and eigenvectors are in their own right useful
vec.to.ellipse<-function(pca.df) {
ddply(pca.df,.(val),function(cur.pca) {
# assume there are two vectors now
create.ellipse.points(x.off=cur.pca[1,"x"],y.off=cur.pca[1,"y"],
b=sqrt(5)*cur.pca[1,"vw"],a=sqrt(5)*cur.pca[2,"vw"],
th.off=pi/2-atan2(cur.pca[1,"vy"],cur.pca[1,"vx"]),
x.cent=cur.pca[1,"x"],y.cent=cur.pca[1,"y"])
})
}
cell.ellipse<-vec.to.ellipse(cell.pca)
ggplot(cur.cell.df,aes(x=x,y=y))+
geom_tile(color="black",fill="grey")+
geom_segment(data=cell.pca,aes(x=x,y=y,xend=x+vx*vw,yend=y+vy*vw,
color=as.factor(round(vw*10)/10)),
arrow=arrow(length = unit(0.3,"cm")),size=2)+
geom_path(data=cell.ellipse)+
labs(title="Single Cell",color="Score /\nEigenvalue")+
theme_bw(20)+coord_equal()+guides(fill=F)
ggplot(cur.cell.df,aes(x=x,y=y))+ geom_tile(color="black",fill="grey")+ geom_path(data=bbox.fun(cur.cell.df),aes(x=xi,y=yi,color="Bounding\nBox"))+ geom_path(data=cell.ellipse,aes(color="Ellipse"))+ labs(title="Single Cell",color="Shape\nAnalysis\nMethod")+ theme_bw(20)+coord_equal()+guides(fill=F)
cell.pca.all<-pca.fun(subset(cell.lab.df,val %in% keep.vals$val))
cell.ell.all<-vec.to.ellipse(cell.pca.all)
ggplot(cell.bbox.all,aes(x=x-x,y=y-y))+
geom_raster(data=mean.sub.df,aes(x=x,y=y))+
geom_path(data=cell.ell.all,aes(x=x-x.cent,y=y-y.cent),color="red")+
geom_point(color="red",size=2)+
geom_segment(data=cell.pca.all,aes(xend=vx*vw,yend=vy*vw),
arrow=arrow(length = unit(0.3,"cm")),color="red")+
facet_wrap(~val,ncol=14)+
labs(x="x - COM",y="y - COM")+
theme_bw(15)+coord_equal()
pca.aiso.all<-ddply(cell.pca.all,.(val),function(cur.pca) {
lon.ax<-max(cur.pca$vw)
sho.ax<-min(cur.pca$vw)
data.frame(e.aiso=(lon.ax-sho.ax)/lon.ax)
})
both.aiso<-merge(pca.aiso.all,cell.bbox.all,by="val")
no.aiso.vals<-subset(both.aiso,aiso<0.01 & e.aiso>0.2)$val
ggplot(both.aiso,aes(x=aiso,y=e.aiso))+
geom_point(aes(color=ifelse(aiso<0.01,"lost information","normal")))+
geom_smooth()+
labs(x="Extents / BB Anisotropy",y="Elliptical Anisotropy",color="Points")+
coord_equal()+theme_bw(20)
We see that there seems to be a general, albeit weak, correlation between the two measures. The most concerning portion is however the left side where the extents or bounding box method reports 0 anisotropy and the elliptical method reports substancial amounts of it.
ggplot(subset(cell.bbox.all,val %in% no.aiso.vals),aes(x=x-x,y=y-y))+
geom_raster(data=subset(mean.sub.df,val %in% no.aiso.vals),aes(x=x,y=y))+
geom_path(aes(x=xi-x,y=yi-y,color="BB"))+
geom_path(data=subset(cell.ell.all,val %in% no.aiso.vals),
aes(x=x-x.cent,y=y-y.cent,color="Ellipse"))+
geom_point(color="red",size=2)+
geom_segment(data=subset(cell.pca.all,val %in% no.aiso.vals),
aes(xend=vx*vw,yend=vy*vw),
arrow=arrow(length = unit(0.3,"cm")),color="red")+
facet_wrap(~val)+
labs(x="x - COM",y="y - COM",color="Model")+
theme_bw(15)+coord_equal()
The models we have done are all applicable to both 2D and 3D images. The primary difference is when looking at 3D images there is an extra dimension to consider.
xg<-seq(-10,10,length.out=41)
make.ellipse.3d<-function(a,b,c,xg) {
ell.3d<-expand.grid(x=xg,y=xg,z=xg)
ell.3d$val<-with(ell.3d,(x/a)^2+(y/b)^2+(z/c)^2 < 1)
ell.3d
}
cloud(z~x*y,data=subset(make.ellipse.3d(9,3,10,xg),val),xlim=c(-10,10),ylim=c(-10,10),zlim=c(-10,10))
It can also be shown as a series of 2D slices.
ggplot(subset(make.ellipse.3d(9,3,10,xg),val),aes(x=x,y=y))+ geom_tile(color="blue")+ facet_wrap(~z)+ coord_equal()+ xlim(c(-10,10))+ylim(c(-10,10))+ theme_bw(10)
Anisotropy applies to these samples as well but it only gives us information about the shortest and the longest dimensions. Is that enough?
Each of these images has the exact same anisotropy because the shortest semiaxis length remains
xg<-seq(-10,10,length.out=41) b.vals<-seq(3,10,length.out=4) cloud(z~x*y | as.factor(b), data=ldply(b.vals,function(b) cbind(subset(make.ellipse.3d(b,10,3,xg),val),b=b)), xlim=c(-10,10),ylim=c(-10,10),zlim=c(-10,10))
If we take a slice through the image we can image the final shape looking like a very small thin rod in the case where the second dimension is equal to 3 (short in two directions and long in the other) and a pankcake (short in one direction and long in the other two).
ggplot(ldply(rev(b.vals),function(b) cbind(subset(make.ellipse.3d(b,10,3,xg),val & z==0),b=b)), aes(x=x,y=y))+ geom_tile(aes(fill=as.factor(round(b*10)/10)),alpha=1)+ facet_wrap(~z)+ coord_equal()+ xlim(c(-10,10))+ylim(c(-10,10))+ labs(title="Profile Cut-through of Ellipse",fill="Second\nDimension\nLength")+ theme_bw(20)
We can thus introduce a new metric for assessing the second-degree anisotropy in the object and this we shall somewhat more formally call Oblateness.
\[ \textrm{Ob} = 2\frac{\lambda_{2}-\lambda_{1}}{\lambda_{3}-\lambda_{1}}-1 \]
The value like in anisotropy is bound between -1 and 1.
ggplot(ldply(rev(b.vals),function(b) cbind(subset(make.ellipse.3d(b,10,3,xg),val & z==0),b=b)), aes(x=x,y=y))+ geom_tile(aes(fill=as.factor(round(b*10)/10)),alpha=1)+ facet_wrap(~z)+ coord_equal()+ xlim(c(-10,10))+ylim(c(-10,10))+ labs(title="Profile Cut-through of Ellipse",fill="Second\nDimension\nLength")+ theme_bw(20)
ob.vals<-data.frame(b=b.vals,obl=2*(b.vals-3)/(10-3)-1
)
ggplot(ob.vals,aes(x=b,y=obl))+
geom_line()+
labs(x="Second Dimension Length",y="Oblateness")+
theme_bw(20)
Many physical and chemical processes occur at surfaces and interfaces and consequently these structures are important in material science and biology. For this lecture surface and interface will be used interchangebly and refers to the boundary between two different materials (calcified bone and soft tissue, magma and water, liquid and gas) Through segmentation we have identified the unique phases in the sample under investigation.
We see that the dilation and erosion affects are strongly related to the surface area of an object: the more surface area the larger the affect of a single dilation or erosion step.
make.circle<-function(nx,r,nscale=1) {
ny<-nx
grad.im<-expand.grid(x=c(-nx:nx)/nscale,y=c(-ny:ny)/nscale)
grad.im$val<-with(grad.im,(x^2+y^2)calc.eros.cnt<-function(nx,r,nscale=1,im.fun=make.circle) {
in.cir<-im.fun(nx,r,nscale=nscale)
cir.im<-df.to.im(in.cir)
data.frame(nx=nx,r=r,nscale=nscale,
dvolume=sum(dilate(cir.im>0,makeBrush(3,"diamond"))>0),
evolume=sum(erode(cir.im>0,makeBrush(3,"diamond"))>0),
volume=sum(cir.im>0)
)
}
max.rad<-20
r.list<-seq(0.2,max.rad,length.out=20)
r.table<-ldply(r.list,function(r) calc.eros.cnt(max.rad+1,r,1))
r.table$model.perimeter<-with(r.table,2*pi*r)
r.table$estimated.perimeter<-with(r.table,
((dvolume-volume)+(volume-evolume))/2)
ggplot(r.table,aes(x=r))+
geom_line(aes(y=estimated.perimeter,color="Morphological Estimation"))+
geom_line(aes(y=model.perimeter,color="Model Circumference"))+
labs(x="Radius",y="Perimeter",color="Source")+
theme_bw(20)
Constructing a mesh for an image provides very different information than the image data itself. Most crucially this comes when looking at physical processes like deformation.
While the images are helpful for visualizing we rarely have models for quantifying how difficult it is to turn a pixel off
base.grid<-expand.grid(x=c(-10:10),y=c(-10:10))
base.shape.undef<-cbind(base.grid,val=with(base.grid,abs(x)<6 & abs(y)<6))
def.grid<-rbind(
cbind(base.shape.undef,type="Before"),
cbind(base.grid,val=with(base.grid,abs(x)<10 & abs(y)<4),type="After")
)
ggplot(def.grid,
aes(x=x,y=y,fill=ifelse(val,"on","off")))+
geom_tile(alpha=0.80,color="black")+
coord_equal()+facet_wrap(~type)+
xlim(c(-10,10))+ylim(c(-10,10))+
labs(title="Pixel-based Deformation",fill="Value")+
theme_bw(20)
If the image is turned into a mesh we now have a list of vertices and edges. For these vertices and edges we can define forces. For example when looking at stress-strain relationships in mechanics using Hooke's Model \[ \vec{F}=k (\vec{x}_0-\vec{x}) \] the force needed to stretch one of these edges is proportional to how far it is stretched.
df.to.mesh<-function(in.df,d.size.x=0.5,d.size.y=0.5,make.seg=F) {
corner.pts<-expand.grid(dx=c(-d.size.x,d.size.x),dy=c(-d.size.y,d.size.y))
min.x<-min(in.df$x)
max.x<-max(in.df$x)
min.y<-min(in.df$y)
max.y<-max(in.df$y)
mesh.pts<-ddply(in.df,.(x,y),function(cur.df) {
xy.cols<-c("x","y")
pos.data<-cur.df[1,xy.cols]
n.data<-cur.df[,!(names(cur.df) %in% xy.cols)]
ddply(corner.pts,.(dx,dy),function(c.offset) {
cbind(x=pos.data$x+c.offset$dx[1],y=pos.data$y+c.offset$dy[1],n.data)
})
})
if(make.seg) {
grid.pts<-expand.grid(dx=c(-d.size.x,0,d.size.x),dy=c(-d.size.y,0,d.size.y))
grid.pts$seg.len<-with(grid.pts,sqrt(dx^2+dy^2))
subset(
ddply(subset(grid.pts,((abs(dx)==0) + (abs(dy)==0))==1),.(dx,dy,seg.len),
function(c.offset) {
cbind(xend=mesh.pts$x+c.offset$dx[1],yend=mesh.pts$y+c.offset$dy[1],
mesh.pts)
}),
xendmin.x & yendmin.y)
} else {
mesh.pts
}
}
undef.mesh<-df.to.mesh(subset(base.shape.undef,val),make.seg=T)
base.shape.def<-base.shape.undef
base.shape.def$x<-base.shape.def$x*10/6
base.shape.def$y<-base.shape.def$y*4/6
def.mesh<-df.to.mesh(subset(base.shape.def,val),
d.size.x=0.5*10/6,d.size.y=0.5*4/6,make.seg=T)
ggplot(rbind(cbind(undef.mesh,type="Before"),
cbind(def.mesh,type="After")),
aes(x=x,y=y))+
geom_segment(aes(xend=xend,yend=yend,color=sign(seg.len-0.5)),size=2)+
geom_point(size=2,color="green",pch=20)+
coord_equal()+facet_wrap(~type)+
scale_color_gradient2(mid="black",low="red",high="blue",space="Lab")+
#xlim(c(-10,10))+ylim(c(-10,10))+
labs(title="Mesh-based Deformation",color="(x-x0)")+
theme_bw(20)
Since we uses voxels to image and identify the volume we can use the voxels themselves as an approimation for the surface of the structure.
From this we can create a mesh by
A wide variety of methods of which we will only graze the surface (http://en.wikipedia.org/wiki/Image-based_meshing)
Voxels are very poor approximations for the surface and are very rough (they are either normal to the x, y, or z axis and nothing between). Because of their inherently orthogonal surface normals, any analysis which utilizes the surface normal to calculate another value (growth, curvature, etc) is going to be very inaccurate at best and very wrong at worst.
The image is processed one voxel at a time and the neighborhood (not quite the same is the morphological definition) is checked at every voxel. From this configuration of values, faces are added to the mesh to incorporate the most simple surface which would explain the values.
Marching tetrahedra is for some applications a better suited approach
So while bounding box and ellipse-based models are useful for many object and cells, they do a very poor job with the sample below.
cur.cell.df<-subset(cell.lab.df,val==155) cell.pca<-pca.fun(cur.cell.df) cell.ellipse<-vec.to.ellipse(cell.pca) ggplot(cur.cell.df,aes(x=x,y=y))+ geom_tile(color="black",fill="grey")+ geom_path(data=bbox.fun(cur.cell.df),aes(x=xi,y=yi,color="Bounding\nBox"))+ geom_path(data=cell.ellipse,aes(color="Ellipse"))+ labs(title="Single Cell",color="Shape\nAnalysis\nMethod")+ theme_bw(20)+coord_equal()+guides(fill=F)